/**Updated Sept 06, 2016 */
/** AF: Updated Aug 1, 2017 */
/**This File Creates the Firm-HS-Country-Month Dataset (split by AL/RP) for ER Project, exploring various quantity definitions */

/* Note this file does not at the moment create 2012-2013 years. For that see exp_raw_explore.sas*/

libname fromlftd  "";
libname outloc "";

/** *******************************************************************************/
/** 			SUMMARY OF THIS FILE					 */
/**										*/
/** FILES THAT MUST BE RUN FIRST -- NONE 				       */
/** SECTION 1: Imports 							      */
/** 	Part 1. 1993-2000 Years						     */
/**		Step 1 Read in and Restrict variables 			    */
/** 		Step 2 Create Extracts 					   */
/**		Step 3 Collapse Down to EIN Alpha HS Country 		  */
/**		       Month QTY1_Con QTY1_IM for NON-RELATED TRANSACTIONS */
/**		Step 4 Collapse Down to EIN Alpha HS Country 		*/
/**		       Month QTY1_Con QTY1_IM for RELATED TRANSACTIONS  */
/** 		Step 5 Now Append Everything Together 		      */
/**		 Output Data in Stata Format 			     */
/**     Part 2: 2001-2011 years (same steps as above)               */
/**     Part 3: Same as above but with SWT instead		   */	
/** OUTPUT alt_mon_exp_&year.dta	   		          */
/** ********************************************************/

/** This file takes the HS --> SIC/NAICS concording process from Offshoring/lfttdprep_exp.sas */
/** I generally follow that structure */


/** ***********************************************************/
/** PART A. TRADE VALUES				     */
/** *********************************************************/

/** ***********************************************************/
/** SECTION 1. 2008 - 2009 YEARS: NAICS			     */
/** *********************************************************/


/**MACRO to loop through years */
%macro byyear(year);


/** Step 4.1 Read in Concordance Data for LFTTD */
	
	proc import out = hs_naics_export
	FILE="hs_sic_naics_exports_89_109.csv"
	dbms= CSV replace;
	run;

	data hs_naics_export;
		set hs_naics_export (keep = commodity naics year);
		if year + 1900 = &year;
		hst = trim(left(commodity));
		hs = '0000000000';
		if length(hst) lt xx then substr(hs,11-length(hst))=hst;
		else hs = hst;	
	run;

	proc sort data=hs_naics_export (drop = year hst commodity);
		by hs;
	run;


/**Step 4.2 Read in and Restrict variables */
data expyr;
	set fromlftd.exp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 RELATED VALUE firmid 
	mot qty1_con qty1_imp );

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	
	month2 = 00;
	month2=ORIG_MON;
	
run;

data expyr;
	set expyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;

/** Step 4.3 Merge in NAICS Information -- Do for each year to remove any that don't match */
	proc sort data=expyr;
		by hs;
	run;

	data expyrnaics1;
		merge expyr (in=data1) hs_naics_export (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=expyrnaics1;
		tables m_var;
	run;

	data expyrnaics;
		set expyrnaics1;
		if m_var=2 then delete;
	run;


/** Step 4.4 Create Extracts */
/* Related Extract */
data exprel;
	set expyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data expnonrel;
	set expyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty_1;*/
	year = &year;
	rename month2=month;
run;


/**Step 4.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=expnonrel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=expnonrel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monexpnonrel;
	set monexpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 4 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=exprel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=exprel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexprel sum(value qty1) = relvalue relqty;
run;

data monexprel;
	set monexprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5 Now Append Everything Together */
data monexp;
	set monexprel monexpnonrel;
	
run;

proc export data = monexp
	outfile = "exp_mon_&year"
	dbms = stata replace;
run;

%mend;

%byyear(2008);
%byyear(2009);



/** ***********************************************************/
/** SECTION 5. 2010 - 2011 YEARS			     */
/** *********************************************************/

/** Year 2010 */
%let year = 2010;

/** Step 5.1 Read in Concordance Data */
proc import out = hs_naics_export_10
	FILE="CONCORD12_2010_EXPORT2.csv"
	dbms= CSV replace;
run;
	
data hs_naics_export;
	set hs_naics_export_10 (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;

proc sort data=hs_naics_export ;
	by hs;
run;

/**Step 5.2 Read in and Restrict variables */
data expyr;
	set fromlftd.exp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 RELATED VALUE firmid 
	mot qty1_con qty1_imp );

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data expyr;
	set expyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;

/** Step 4.3 Merge in NAICS Information -- Do for each year to remove any that don't match */
	proc sort data=expyr;
		by hs;
	run;

	data expyrnaics1;
		merge expyr (in=data1) hs_naics_export (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=expyrnaics1;
		tables m_var;
	run;

	data expyrnaics;
		set expyrnaics1;
		if m_var=2 then delete;
	run;


/** Step 4.4 Create Extracts */
/* Related Extract */
data exprel;
	set expyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data expnonrel;
	set expyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty_1;*/
	year = &year;
	rename month2=month;
run;


/**Step 4.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=expnonrel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=expnonrel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monexpnonrel;
	set monexpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 4 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=exprel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=exprel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexprel sum(value qty1) = relvalue relqty;
run;

data monexprel;
	set monexprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5 Now Append Everything Together */
data monexp;
	set monexprel monexpnonrel;
	
run;

proc export data = monexp
	outfile = "exp_mon_&year"
	dbms = stata replace;
run;


/** Year 2011 */
%let year = 2011;

/** Step 5.1 Read in Concordance Data */
proc import out = hs_naics_export_11
	FILE="/CONCORD01_2011_EXPORT2.csv"
	dbms= CSV replace;
run;

data hs_naics_export;
	set hs_naics_export_11 (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;

proc sort data=hs_naics_export ;
	by hs;
run;

/**Step 5.2 Read in and Restrict variables */
data expyr;
	set fromlftd.exp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 RELATED VALUE firmid 
	mot qty1_con qty1_imp );

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data expyr;
	set expyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;

/** Step 4.3 Merge in NAICS Information -- Do for each year to remove any that don't match */
	proc sort data=expyr;
		by hs;
	run;

	data expyrnaics1;
		merge expyr (in=data1) hs_naics_export (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=expyrnaics1;
		tables m_var;
	run;

	data expyrnaics;
		set expyrnaics1;
		if m_var=2 then delete;
	run;


/** Step 4.4 Create Extracts */
/* Related Extract */
data exprel;
	set expyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data expnonrel;
	set expyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty_1;*/
	year = &year;
	rename month2=month;
run;


/**Step 4.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=expnonrel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=expnonrel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monexpnonrel;
	set monexpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 4 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=exprel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=exprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexprel sum(value qty1) = relvalue relqty;
run;

data monexprel;
	set monexprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5 Now Append Everything Together */
data monexp;
	set monexprel monexpnonrel;
	
run;

proc export data = monexp
	outfile = "exp_mon_&year"
	dbms = stata replace;
run;



/************************************************************************
*** 2012 -- use 2011 concordance for lack of better option (~x% don't match)
*************************************************************************/
/** Do Year = 2012 */

%let year = 2012;

/** Step 5.1 Read in Concordance Data */
proc import out = hs_naics_export_11
	FILE="CONCORD01_2011_EXPORT2.csv"
	dbms= CSV replace;
run;

data hs_naics_export;
	set hs_naics_export_11 (keep=commodity naics);
	hst = trim(left(commodity));
	hs = '0000000000';
	if length(hst) lt xx then substr(hs,11-length(hst))=hst;
	else hs = hst;
run;

proc sort data=hs_naics_export ;
	by hs;
run;

/**Step 5.2 Read in and Restrict variables */
data expyr;
	set fromlftd.exp_comb&year. (keep = HS ORIG_MON COUNTRY QTY1 RELATED VALUE firmid 
	mot qty1_con qty1_imp );

/* We don't want to undercount the value of trade, so keeping all transactions but maintaining the flags for problematic transactions*/
	/*remove if qty is imputed */
	/*IF qty_1_im="Q" OR qty_1_im="S" OR qty_1_im="V" OR qty_1_im="W" then delete;*/
	/*im=Q "qty1 created from qty2 and edit master quantity ratio factor" */
	/*im=S "qt1 created by multiplying reported qty1 with edit master scale factor" */
	/*im=V "qty1 created by dividing value with edit master price factor" */
	/*im=W "qty1 created by dividing swt by edit master vessel or air shipping weight" */

	/*remove if qty is converted */
	/*IF qty_1_cv="1" OR qty_1_cv="2" OR qty_1_cv="3" then delete;*/
	/*cv=1 "conversion using edit master unit of quantity" */
	/*cv=2 "changed to zero because quantity non-convertible with edit master unit of quantity" */
	/*cv=3 "qty1 and qty2 were switched based on edit master unit of quantity listed for each" */

	/*remove if qty is missing or zero */
	/*IF qty_1=0 OR qty_1=. then delete;*/
	/* if firmid="" then delete; */
	month2 = 00;
	month2=ORIG_MON;
	
run;

data expyr;
	set expyr;
	quarter = 1;
	if month2=4 OR month2=5 OR month2=6 THEN quarter=2;
	if month2=7 OR month2=8 OR month2=9 THEN quarter=3;
	if month2=10 OR month2=11 OR month2=12 THEN quarter=4;
	qty1_zero=0;
	if qty1=0 OR qty1=. THEN qty1_zero=1;
run;

/** Step 4.3 Merge in NAICS Information -- Do for each year to remove any that don't match */
	proc sort data=expyr;
		by hs;
	run;

	data expyrnaics1;
		merge expyr (in=data1) hs_naics_export (in=data2);
		by hs;
		if data1 and not data2 then m_var=1;
		if not data1 and data2 then m_var=2;
		if data1 and data2 then m_var=3;
	run;

	proc freq data=expyrnaics1;
		tables m_var;
	run;

	data expyrnaics;
		set expyrnaics1;
		if m_var=2 then delete;
	run;


/** Step 4.4 Create Extracts */
/* Related Extract */
data exprel;
	set expyrnaics (drop=ORIG_MON);
	if related="Y" OR related="R";
	/*unitvalrel = value/qty_1;*/
	/*unitvalswt = value/swt;*/
	year = &year;
	relvalue = value;
	rename month2=month;
run;

/* Nonrelated Extract */
data expnonrel;
	set expyrnaics (drop=ORIG_MON);
	if related~="N" then delete;
	/*unitvalnonrel = value/qty_1;*/
	year = &year;
	rename month2=month;
run;


/**Step 4.5 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for NON-RELATED TRANSACTIONS*/
proc sort data=expnonrel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=expnonrel NOPRINT;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexpnonrel sum(value qty1) = nonrelvalue nonrelqty;
run;

data monexpnonrel;
	set monexpnonrel;
	rename _freq_=numtransnonrel;
	year = &year;
run;

/**Step 4 Collapse Down to EIN Alpha HS Country Month qty1_im qty1_con qty1_zero for RELATED TRANSACTIONS*/
proc sort data=exprel;
	by firmid  country month hs naics qty1_imp qty1_con qty1_zero;
run;

proc means data=exprel NOPRINT;
	by firmid country month hs naics qty1_imp qty1_con qty1_zero;
	var value qty1;
	output out = monexprel sum(value qty1) = relvalue relqty;
run;

data monexprel;
	set monexprel;
	rename _freq_=numtransrel;
	year = &year;
run;

/**Step 5 Now Append Everything Together */
data monexp;
	set monexprel monexpnonrel;
	
run;

proc export data = monexp
	outfile = "exp_mon_&year"
	dbms = stata replace;
run;